{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 更多数据探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn import datasets"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "``datasets``中的数据集：[http://scikit-learn.org/stable/datasets/index.html](http://scikit-learn.org/stable/datasets/index.html)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### MNIST数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.datasets import fetch_mldata\n",
    "mnist = fetch_mldata('MNIST original')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(70000, 784)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mnist.data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(70000,)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mnist.target.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0, 0, 0, ..., 0, 0, 0],\n",
       "       [0, 0, 0, ..., 0, 0, 0],\n",
       "       [0, 0, 0, ..., 0, 0, 0],\n",
       "       ..., \n",
       "       [0, 0, 0, ..., 0, 0, 0],\n",
       "       [0, 0, 0, ..., 0, 0, 0],\n",
       "       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mnist.data[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  51, 159, 253,\n",
       "       159,  50,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  48, 238,\n",
       "       252, 252, 252, 237,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  54,\n",
       "       227, 253, 252, 239, 233, 252,  57,   6,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  10,\n",
       "        60, 224, 252, 253, 252, 202,  84, 252, 253, 122,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0, 163, 252, 252, 252, 253, 252, 252,  96, 189, 253, 167,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,  51, 238, 253, 253, 190, 114, 253, 228,  47,  79, 255,\n",
       "       168,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,  48, 238, 252, 252, 179,  12,  75, 121,  21,   0,\n",
       "         0, 253, 243,  50,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,  38, 165, 253, 233, 208,  84,   0,   0,   0,\n",
       "         0,   0,   0, 253, 252, 165,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   7, 178, 252, 240,  71,  19,  28,   0,\n",
       "         0,   0,   0,   0,   0, 253, 252, 195,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,  57, 252, 252,  63,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0, 253, 252, 195,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0, 198, 253, 190,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0, 255, 253, 196,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  76, 246, 252,\n",
       "       112,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 253, 252,\n",
       "       148,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  85,\n",
       "       252, 230,  25,   0,   0,   0,   0,   0,   0,   0,   0,   7, 135,\n",
       "       253, 186,  12,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,  85, 252, 223,   0,   0,   0,   0,   0,   0,   0,   0,   7,\n",
       "       131, 252, 225,  71,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,  85, 252, 145,   0,   0,   0,   0,   0,   0,   0,\n",
       "        48, 165, 252, 173,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,  86, 253, 225,   0,   0,   0,   0,   0,\n",
       "         0, 114, 238, 253, 162,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,  85, 252, 249, 146,  48,  29,\n",
       "        85, 178, 225, 253, 223, 167,  56,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,  85, 252, 252, 252,\n",
       "       229, 215, 252, 252, 252, 196, 130,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  28, 199,\n",
       "       252, 252, 253, 252, 252, 233, 145,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,  25, 128, 252, 253, 252, 141,  37,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
       "         0,   0,   0,   0], dtype=uint8)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mnist.data[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAABvBJREFUeJzt3V9ozv0fx/FrupOiLeGuKTlyzjhyso0TSRygOVgpKVEo\n5EAOFg7kQCkOHJrypyRqjnFEK2tyttMpDqS2RJPa7/h3cL2v3dtcZq/H4/Tlu+vbzbPvwefe9+qY\nnZ1tAHlW/OkbAP4M8UMo8UMo8UMo8UMo8UMo8UMo8UMo8UOof9r8ef53Qvj9Oubyhzz5IZT4IZT4\nIZT4IZT4IZT4IZT4IZT4IZT4IZT4IZT4IZT4IZT4IZT4IZT4IZT4IZT4IZT4IZT4IZT4IZT4IZT4\nIZT4IZT4IZT4IZT4IZT4IZT4IZT4IZT4IVS7v6KbZebdu3flfvv27abbvXv3ymuPHj1a7qdPny73\nnp6eck/nyQ+hxA+hxA+hxA+hxA+hxA+hxA+hOmZnZ9v5eW39MBZufHy83Pv7+8t9enp6MW/n/3R1\ndZX7169ff9tnL3Edc/lDnvwQSvwQSvwQSvwQSvwQSvwQSvwQyu/zhxsdHS33gwcPlvvU1FS5d3Q0\nP3Lu7Owsr125cmW5f/nypdzfvHnTdNu+ffuCPns58OSHUOKHUOKHUOKHUOKHUOKHUH6ldxn4/v17\n021sbKy8dnBwsNwnJyfLvdW/n+qor9Vx28WLF8t9YGCg3Kt7u3btWnntpUuXyn2J8yu9QHPih1Di\nh1Dih1Dih1Dih1Dih1B+pXcZOHHiRNPtwYMHbbyT/6bV13t/+/at3Ht7e8v91atXTbcPHz6U1ybw\n5IdQ4odQ4odQ4odQ4odQ4odQ4odQzvn/Aq3Ow0dGRppuC31fQ19fX7nv27ev3C9cuNB027hxY3nt\ntm3byn3t2rXl/vLly6Zbm99jsSR58kMo8UMo8UMo8UMo8UMo8UMo8UMo7+1fAsbHx8u9v7+/3Ken\np+f92Xv37i33hw8flnv1O/ONRv1788ePHy+v3bBhQ7m3smJF82fb6tWry2tfv35d7j09PfO6pzbx\n3n6gOfFDKPFDKPFDKPFDKPFDKPFDKOf8bTAxMVHuQ0ND5f7o0aNyr87Du7u7y2svX75c7ocOHSr3\npaw65+/oqI/CBwYGyn0pfx9Cwzk/UBE/hBI/hBI/hBI/hBI/hPLq7kUwMzNT7tXrqxuNRuPFixfl\n3tnZWe7Dw8NNtx07dpTX/vjxo9xTTU5O/ulb+O08+SGU+CGU+CGU+CGU+CGU+CGU+CGUc/5FMDY2\nVu6tzvFbef78ebn39vYu6OeTyZMfQokfQokfQokfQokfQokfQokfQjnnXwTnzp0r91avR+/r6yt3\n5/jzs5DX0rf5lfZ/hCc/hBI/hBI/hBI/hBI/hBI/hBI/hHLOP0cjIyNNt/Hx8fLaVl8HvX///nnd\nE7Xqv3urv5OtW7cu9u0sOZ78EEr8EEr8EEr8EEr8EEr8EEr8EMo5/xxV32P/8+fP8tp///233AcG\nBuZ1T8vdzMxMuQ8NDc37Z+/evbvcr1+/Pu+f/bfw5IdQ4odQ4odQ4odQ4odQ4odQjvraYNWqVeXe\n3d3dpjtZWlod5V27dq3cb9y4Ue6bNm1qup0/f768ds2aNeW+HHjyQyjxQyjxQyjxQyjxQyjxQyjx\nQyjn/G2Q/Gru6rXmrc7pHz9+XO4HDhwo96dPn5Z7Ok9+CCV+CCV+CCV+CCV+CCV+CCV+COWcf45m\nZ2fntTUajcazZ8/K/datW/O6p6Xg5s2b5X716tWm29TUVHnt4OBguQ8PD5c7NU9+CCV+CCV+CCV+\nCCV+CCV+CCV+COWcf446OjrmtTUajcbnz5/L/cyZM+V+7Nixcl+3bl3T7e3bt+W19+/fL/f379+X\n++TkZLlv3ry56bZnz57y2lOnTpU7C+PJD6HED6HED6HED6HED6HED6Ec9bXBr1+/yv3OnTvl/uTJ\nk3Lv6upquk1MTJTXLtTOnTvLfdeuXU23K1euLPbt8B948kMo8UMo8UMo8UMo8UMo8UMo8UOojlav\nnV5kbf2wxfTx48em2+HDh8trR0dHF/TZrf6OWv1KcWX9+vXlfuTIkXL/m187vozN6R+EJz+EEj+E\nEj+EEj+EEj+EEj+EEj+Ecs6/CD59+lTud+/eLffqa6wbjYWd8589e7a89uTJk+W+ZcuWcmdJcs4P\nNCd+CCV+CCV+CCV+CCV+CCV+COWcH5Yf5/xAc+KHUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKH\nUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKHUOKH\nUP+0+fPm9NXBwO/nyQ+hxA+hxA+hxA+hxA+hxA+hxA+hxA+hxA+hxA+hxA+hxA+hxA+hxA+hxA+h\nxA+hxA+hxA+hxA+hxA+hxA+hxA+hxA+h/gcSHiIylvXweQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x115233748>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "digit = mnist.data[0]\n",
    "digit_image = digit.reshape(28, 28)\n",
    "\n",
    "plt.imshow(digit_image, cmap=matplotlib.cm.binary)\n",
    "plt.axis(\"off\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mnist.target[0]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
